import pandas as pd
names = ['Zhenxuan Ouyang','Jianmin Li']
smuid = [47784546, 47794972]
teaminfo = pd.DataFrame()
teaminfo['name'] = names
teaminfo['smu id'] = smuid
teaminfo
We think it'll be useful to use DeepDream to analyze CNN visually. Even though we have different CNN to classify different objects, some of them are very powerful, we still don't know why the network does that way in order to classify objects or what the network learns in order to make the final decision. In order to understand internal knowledge the network learns, we need a way such like DeepDream to analyze the network.
One thing the DeepDream can tell us is when DeepDream detects something it could enhance that. It we put a picture with an aircraft which looks like a bird, then DeepDream detects it and enhance it, if we put a picture with a boat which looks like fish, then DeepDream detects it and enhance it. If we choose the higher layer that represents the complex and sophisticated features, then we can know what things the network detects and what object the network thinks it is by using DeepDream.
Using DeepDream to analyze CNN, not only it shows us what part of image that the network thinks it's important, but we also know where the weakness of the network is and how to improve it. All in all, the DeepDream allows us to know more about what's going on inside the CNN network.
In this lab, we choose VGG16 as our neural network model that has been pre-trained on ImageNet. The VGG network architecture was introduced by Simonyan and Zisserman in their 2014 paper, "Very Deep Convolutional Networks for Large Scale Image Recognition" (Reference: https://www.pyimagesearch.com/2017/03/20/imagenet-vggnet-resnet-inception-xception-keras/) The VGG network is famous for its simplicity, using 3x3 convolutional layers. It has two fully-connected layers with 4096 nodes in each layer followed by softmax classifier. 16 means VGG16 has 16 weight layers.
The main reason why we choose VGG16 is because of its simplicity compared to other network. It has 23 layers compared to other networks which has more than 100 layers. Besides, the accuracy between VGG16 and other networks are only slightly different, not too much. For simply objects classification, we think there is no big difference between VGG16 and other networks.
First we import Keras and we choose VGG16 as our pre-trained model on ImageNet dataset.
import keras
keras.__version__
from keras.models import load_model
from keras.applications.vgg16 import VGG16
# from keras.applications.xception import Xception
from keras import backend as K
from IPython.display import clear_output
model = VGG16(weights='imagenet', include_top=False)
# model = Xception(weights='imagenet', include_top=False)
model.summary() # As a reminder.
Then we choose one picture as an input to our DeepDream network. The picture is about an aircraft with sky as its background
import copy
img_url = 'https://images.fineartamerica.com/images/artworkimages/mediumlarge/1/f18-fighter-jet-aaron-berg.jpg'
# We preprocess the image into a 4D tensor
from keras.preprocessing import image
import numpy as np
import requests
from io import BytesIO
# use this to install PIL: conda install --channel conda-forge pillow=5
from PIL import Image
import matplotlib.pyplot as plt
def load_preprocess_img(url):
response = requests.get(url)
img = Image.open(BytesIO(response.content))
img_tensor = np.array(img).astype(float)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.
return img_tensor
img_tensor = load_preprocess_img(img_url)
print(img_tensor.shape)
img_drawing = copy.deepcopy(img_tensor)
plt.imshow(img_tensor[0])
plt.show()
Now we start implementing the DeepDream process. We defined four different functions to implement this process.
from keras.preprocessing.image import load_img, save_img, img_to_array
import scipy
from IPython.display import clear_output
from tqdm.notebook import tqdm
import cv2
# ref: https://keras.io/examples/deep_dream/
def resize_img(img, size):
img = np.copy(img)
factors = (1,
float(size[0]) / img.shape[1],
float(size[1]) / img.shape[2],
1)
return scipy.ndimage.zoom(img, factors, order=1)
def deprocess_image(x):
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + 1e-5)
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
x = np.clip(x, 0, 255).astype('uint8')
return x
def generate_random_size(img, resize_number):
original_shape = img.shape[1:3]
resize_list = [original_shape]
resize_ratio = 1.5
for i in range(resize_number):
step_shape = tuple([int(dimension/(resize_ratio**i)) for dimension in original_shape])
resize_list.append(step_shape)
resize_list = resize_list[::-1]
return resize_list
def generate_deepdream_random_resize(layer_name, iteration_number, step, resize_number, noise=False):
# Build a loss function that maximizes the activation
# of the nth filter of the layer considered.
layer_output = model.get_layer(layer_name).output
loss = K.mean(layer_output[:, :, :, :])
# Compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, model.input)[0]
# Normalization trick: we normalize the gradient with L1-normalization
grads /= K.mean(K.abs(grads)) + 1e-5
# This function returns the loss and grads given the input picture
iterate = K.function([model.input], [loss, grads])
input_img_data = copy.deepcopy(img_drawing)
iteration_img_list = list()
resize_iteration_list = list()
resize_list = generate_random_size(input_img_data, resize_number)
shrunk_original_image = resize_img(input_img_data, resize_list[0])
for theShape in resize_list:
input_img_data = resize_img(input_img_data, theShape)
# add gaussian noise
if noise==True:
input_img_data += 1.0*np.random.normal(0, 0.3, input_img_data.shape)
# gradient ascent
for i in tqdm(range(iteration_number)):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
iteration_img_list.append(deprocess_image(input_img_data[0]))
# upscale the shrunk image
upscale_shrunk_img = resize_img(shrunk_original_image, theShape)
# resize the original image into the same size
same_size_original = resize_img(img_drawing, theShape)
# calculate the lost detail
lost_diff = same_size_original - upscale_shrunk_img
# add the lost detail
input_img_data += lost_diff
# reset the shrunk image to current size as input to the next resize shape
shrunk_original_image = resize_img(img_drawing, theShape)
# save the image into list
resize_iteration_list.append(deprocess_image(input_img_data[0]))
return iteration_img_list, resize_iteration_list
We put our picture into DeepDream we implemented. We visualized layer "block5_conv3", we have 15 iterations, 0.05 step and 3 shrinking times.
And Then we save the image at every iteration and visualize it, now let's see what things are added into our image by DeepDream.
%%time
layer_name = 'block5_conv3'
#without noise
img_list_vgg_without_noise, resize_img_list_vgg_without_noise = generate_deepdream_random_resize(
layer_name=layer_name,
iteration_number=15,
step=0.05,
resize_number=3,
noise=False)
import glob
import cv2
import numpy as np
def save_img(target_path, img_list, start_index, end_index):
plt.figure(figsize=(50,100))
for i in range(start_index, end_index):
filename = ''
index = int(np.floor(i/10))
# filename += 'image-' + str(chr(index+ord('a'))) + '-' + str(i%10)
saved_image_filename = 'image-' + str(chr(index+ord('a'))) + '-' + str(i%10) + '.png'
cv2.imwrite(image_path+saved_image_filename, img_list[i])
plt.subplot(end_index-start_index, 1, i-start_index+1)
plt.imshow(img_list[i])
# save without-noise images to local and display it
image_path = './images/'+model.name+'/without-noise/'
save_img(target_path=image_path, img_list=img_list_vgg_without_noise, start_index=0, end_index=15)
We saved out image for every iteration. As we can see, the original image was an aircraft. But with the number of iterations goes up, DeepDream adds birds to it and in the last image, we can hardly find the original aircraft in the image because the aircraft becomes a bird.
From above, we can see this DeepDream works and we get what we want. First, the background is about sky so we expect the DeepDream would add something that's related to sky. And the original aircraft in the image looks like a bird so we also expected the aircraft could look more like a bird after going through DeepDream.
We also made a small video to show the iterative process. The video is also submitted with the html file.
In case of the generated video file is broken, we also create a gif file to show the deep dream process in each iteration.
def generate_video_from_images(image_path, target_path, layer_name):
img_array = list()
image_path += '*.png'
filename_array = glob.glob(image_path)
filename_array = sorted(filename_array)
size = tuple()
for filename in filename_array:
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width,height)
img_array.append(img)
out = cv2.VideoWriter(target_path+'deepdream-'+layer_name+'.avi',cv2.VideoWriter_fourcc(*'MJPG'), 3, size)
output_video_img_number = 15
for i in range(output_video_img_number):
out.write(img_array[i])
out.release()
print('done')
# generate video from images
image_path = './images/'+model.name+'/without-noise/'
video_path = './videos/'+model.name+'/without-noise/'
generate_video_from_images(image_path=image_path, target_path=video_path, layer_name=layer_name)
We add gusseisen noise into the image every time we add the lost detail. We hope by doing that way the DeepDream can be more robust.
%%time
layer_name = 'block5_conv3'
# with noise
img_list_vgg_with_noise, resize_img_list_vgg_with_noise = generate_deepdream_random_resize(
layer_name=layer_name,
iteration_number=15,
step=0.05,
resize_number=3,
noise=True)
# save noise images to local and display it
image_path = './images/'+model.name+'/with-noise/'
save_img(target_path=image_path, img_list=img_list_vgg_with_noise, start_index=0, end_index=15)
# generate video from images
image_path = './images/'+model.name+'/with-noise/'
video_path = './videos/'+model.name+'/with-noise/'
generate_video_from_images(image_path=image_path, target_path=video_path, layer_name=layer_name)
This is the result after we add noise to it. We cannot see if there are too much differences from non-noise one. We decided to compare them statiscally using Structural Similarity.
from skimage.measure import compare_ssim as ssim
import matplotlib.pyplot as plt
import numpy as np
import cv2
def mse(imageA, imageB):
# the 'Mean Squared Error' between the two images is the sum of the squared difference between the two images;
err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
err /= float(imageA.shape[0] * imageA.shape[1])
# return the MSE, the lower the error, the more "similar"
return err
def compare_images(imageA, imageB, title):
# compute the mean squared error and structural similarity
# index for the images
m = mse(imageA, imageB)
s = ssim(imageA, imageB, multichannel=True)
# setup the figure
fig = plt.figure(title)
plt.suptitle("MSE: %.2f, SSIM: %.2f" % (m, s))
# show first image
ax = fig.add_subplot(1, 2, 1)
plt.imshow(imageA, cmap = plt.cm.gray)
plt.axis("off")
# show the second image
ax = fig.add_subplot(1, 2, 2)
plt.imshow(imageB, cmap = plt.cm.gray)
plt.axis("off")
# show the images
plt.show()
non_noise = cv2.imread("./images/vgg16/without-noise/image-b-4.png")
noise = cv2.imread('./images/vgg16/with-noise/image-b-4.png')
compare_images(non_noise, noise, 'Non-Noise vs Noise')
print("VGG model processing comparison")
plt.figure(figsize=(10,8))
plt.subplot(1,2,1)
plt.title('Without noise')
plt.imshow(img_list_vgg_without_noise[14])
plt.subplot(1,2,2)
plt.title('With noise')
plt.imshow(img_list_vgg_with_noise[14])
The image on the left is without-noise one and the image on the right is with-noise one. We calculate MSE and SSIM, even though from statiscal data, we see they are totally different. However we can still see the bird in the background and the aircraft becomes something that's similar to a bird. However, the non-noise one has a better and clear shape of bird, especiall at the head of the aircraft, non-noise one transforms the aircraft to a bird better. For watching the transformation clearly, there are pictures and video in folder 'images/vgg/' and 'videos/vgg/'.